library(knitr)
opts_chunk$set(warning = F, message = F)
library(ggplot2)
# library gridextra to arrange multiple plot
library(gridExtra)
library(grid)
ggplot(mpg, aes(factor(cyl), cty)) +
geom_point(aes(color = drv, size = displ), position = "jitter") +
stat_boxplot(fill = NA) +
facet_wrap(~year) +
# above are the major information in the plot with other default settings
# fine tuning axis and legend with scale functions ===
scale_x_discrete(breaks = c(4, 5, 6, 8),
labels = c("Four", "Five", "Six", "Eight")) +
scale_y_continuous(limits = c(4, 36),
breaks = 1:4 * 8) +
scale_color_manual(breaks = c("4", "f", "r"),
values = c("4" = "red", "f" = "blue", "r" = "cyan"),
labels = c("four-wheel", "front-wheel", "rear-wheel")) +
scale_size_area(limits = c(2, 7),
max_size = 4) +
# guide scales
guides(
size = guide_legend(
title.position = "top",
nrow = 2,
byrow = TRUE,
override.aes = list(shape = 1),
reverse = TRUE
),
color = guide_legend(
order = 1,
nrow = 3,
override.aes = list(size = 3)
)
) +
# using labs() for all of the titles and labels
labs(title = "This plot displays all major ggplot components",
subtitle = "including data, aes mapping, geom, stat, position, facet, scale, guides, annotaion, and theme",
caption = "Source: what so ever",
x = "Cylinders",
y = "City Mileage (miles/gallon)",
color = NULL,
size = "displacement") +
# === provide extra information ===
# annotate extra geoms mannually. Play with geom_text() if want to label in
# a specific facet panel
annotate("text", x = 1:2, y = 8, label = "haha", hjust = 0, vjust = 1) +
# === use theme() to make it beautiful ===
theme(plot.background = element_rect(fill = "#F5E6E3"),
plot.title = element_text(family = "monospace"),
plot.subtitle = element_text(face = "italic"),
panel.background = element_rect(fill = "lightblue", color = "red"),
panel.grid.major.y = element_line(color = "grey95", size = 0.2),
panel.grid.minor.y = element_blank(),
axis.ticks = element_blank(),
axis.title = element_text(family = "monospace"),
legend.position = "top",
legend.key = element_blank(),
legend.margin = margin(0, 0, 0, 0),
legend.background = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = 12))
labs(), scale_xxx(), and guides()Legend and axis are the most complex aesthetic system in ggplot. There are many way to present them, which make ggplot powerful and, to the downside, confusing. Here we try to standardize the method. scale_xxx is able to control every component of axis and legend. For better manupilation, however, I’ like to break the components into three part, which are controlled by the following three functions:
labs() is for all titles,scale_xxx() determines what to show in axis and legend,guides() determines how to show them.Inside guides(), each aes is guided by guide_legend() or guide_colorbar(). For more examples, see http://ggplot2.tidyverse.org/reference/guide_legend.html, http://ggplot2.tidyverse.org/reference/guide_colourbar.html, and http://ggplot2.tidyverse.org/reference/guides.html.
ggplot(mpg, aes(hwy, cty, color = factor(cyl), shape = drv)) +
geom_point(aes(size = displ)) +
# what to display
scale_x_continuous(limits = c(0, 50),
breaks = c(0, 20, 40),
labels = c("zero", "twenty", "fourty"),
minor_breaks = c(5, 10, 15)) +
scale_y_continuous(labels = NULL) + # remove labels with NULL
scale_color_discrete(breaks = c(4, 5, 6, 8),
labels = c("four", "five", "six", "eight")) + # asign colors by default
scale_shape_manual(limits = c("f", "r"), # manually select shape
values = c(f = 0, r = 2)) +
# how to display
guides(
size = "none", # hide size
color = guide_legend(direction = "horizontal",
title.position = "top",
nrow = 2,
byrow = TRUE, # arrange row by row, default is by column
order = 1), # first legend to show
shape = guide_legend(direction = "vertical",
reverse = TRUE) # reverse order of legend keys
) +
# all titles
labs(title = "Use labs(), scale_xxx(), guides() for legend and axis",
subtitle = "scale_xxx for what to display, guides for how to display, labs for titles",
x = "highway mileage",
y = "city mileage",
color = "cylinder",
shape = "drive train",
size = "haha")
# aes from data.frame and vectors
g1 <- ggplot(mpg) +
geom_jitter(aes(
x = class, # x mapped to column vector mpg$class
y = rnorm(nrow(mpg)), # y is a same length vector, or constant
size = 1:nrow(mpg), # size is a same length vector
color = drv, # vector mpg$drv
alpha = 0.5 # constant
)) +
labs(subtitle = "as long as the same length vector or constant")
# aes from pure vectors
g2 <- ggplot() +
geom_point(aes(x = 1:3, y = 3:1,
color = c("red", "blue", "green"),
shape = letters[1:3]),
size = 5) +
scale_color_identity(guide = "legend") +
labs(subtitle = "can even plot from pure vectors but better reserve for simple plot")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("aes are very flexible",
gp = gpar(fontsize = 16)))
# control same aes across different layers
ggplot(mpg, aes(hwy, cty)) +
geom_point() +
stat_smooth(method = "lm", aes(color = "lm")) +
stat_smooth(meothd = "loess", aes(color = "loess")) +
geom_line(aes(hwy, 10, color = "flat")) +
scale_color_manual(values = c(lm = "red", loess = "blue", flat = "orange")) +
labs(title = "control same aes in different layers", color = "smooth")
Aes such as color and shape automatically divided data into groups for individual geoms as well as collective geoms. Aes group overrides these default groups for collective geoms. The aes group only used to group data. It does not come with legend.
# group aes override default group derived from other aes ====
df <- data.frame(x = 1:9, y = c(1, 3, 2, 5, 3, 6, 5, 2, 6),
z1 = c("a", "b", "c"), z2 = rep(c("A", "B", "C"), each = 3))
g1 <- ggplot(df, aes(x, y, color = z1)) +
geom_point(size = 3) +
geom_line() +
annotate("text", x = 1, y = 7, label = "geom_line(aes(color = z1))",
hjust = "inward", vjust = "inward") +
labs(subtitle = "without group aes, color is the default group for line plot\n")
g2 <- ggplot(df, aes(x, y, color = z1)) +
geom_point(size = 3) +
geom_line(aes(group = z2)) +
annotate("text", x = 1, y = 7, label = "geom_line(aes(color = z1, group = z2))",
hjust = "inward", vjust = "inward") +
labs(subtitle = "without group aes, color is the default group for line plot\n")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("group aes sets groups for collective geoms and overrides default group from other aes",
gp = gpar(fontsize = 16)))
df <- data.frame(x = 1:4, y = 1:4, color = letters[1:4])
ggplot(df, aes(x, y, color = color)) +
geom_line(aes(group = 1111)) + # group can be any number or string and the result is the same
geom_point(size = 5) +
labs(title = "force a constant group",
subtitle = "color aes generate a default group that has only one observation for each group,\nwhich is not enough for line plot. force constant group for line plot")
Use ?stat_xxx to find out what variables the stat computes, which can then be used in aes() for plotting.
ggplot(mpg, aes(hwy)) +
stat_bin(aes(y = ..density..), bins = 10, geom = "line") +
labs(title = "..density.. for density distribution")
# The following plot are the same.
g1 <- ggplot(mpg, aes(drv)) +
stat_count(geom = "bar") + # the default geom
stat_count(geom = "point", size = 5, color = "red") +
stat_count(geom = "line", aes(group = 1), color = "blue", size = 1) +
labs(subtitle = "stat_xxx function where geom is a argument")
g2 <- ggplot(mpg, aes(drv)) +
geom_bar(stat = "count") +
geom_point(stat = "count", size = 5, color = "red") +
geom_line(stat = "count", aes(group = 1), color = "blue", size = 1) +
labs(subtitle = "geom_xxx function where stat is a argument")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("geom_xxx and stat_xxx functions generate the same plot",
gp = gpar(fontsize = 16)))
Focus on stat_xxx functions.
# one contineous variable
ggplot(mpg, aes(hwy)) +
stat_bin(geom = "bar") + # default, equals to geom_histogram()
stat_bin(geom = "line", color = "blue") + # geom_freqpoly()
stat_bin(geom = "point", size = 5, color = "red") +
labs(title = "count one contineous variable")
df <- data.frame(x = rnorm(1000), y = rnorm(1000))
g1 <- ggplot(df, aes(x, y)) +
stat_bin_2d(geom = "tile", bins = 30) + # default geom good
labs(subtitle = "geom is raster")
g2 <- ggplot(df, aes(x, y)) +
stat_bin_hex(geom = "hex", bins = 30) + # default geom good
labs(subtitle = "geom is hex")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("count two contineous variables",
gp = gpar(fontsize = 16)))
# discrete x and contineous y
g1 <- ggplot(mpg, aes(drv, hwy)) +
geom_point(size = 3, color = "grey70") +
stat_summary(fun.y = max, geom = "tile", fill = "red", alpha = 0.1) +
# use user defined summary functions
stat_summary(fun.y = function(s) mean(s + 5), geom = "point",
color = "green", size = 3) +
# plot range defined by mean and standard deviation
stat_summary(fun.y = mean,
fun.ymin = function(s) mean(s) - sd(s),
fun.ymax = function(s) mean(s) + sd(s),
geom = "pointrange", color = "red") + # default geom
labs(subtitle = "discrete x and contineous y")
# contineous x and y
g2 <- ggplot(mpg, aes(cty, hwy)) +
geom_point(size = 3, color = "grey70") +
stat_summary_bin(fun.y = max, geom = "tile", bins = 10,
fill = "red", alpha = 0.3) +
# use user defined summary functions
stat_summary_bin(fun.y = function(s) mean(s + 5), geom = "point",
color = "green", size = 3, bins = 10) +
# plot range defined by mean and standard deviation
stat_summary_bin(fun.y = mean,
fun.ymin = function(s) mean(s) - sd(s),
fun.ymax = function(s) mean(s) + sd(s),
geom = "pointrange", bins = 10,
color = "red") + # default geom
labs(subtitle = "contineous x and contineous y")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("1D summary statistics -- flexible and powerful",
gp = gpar(fontsize = 16)))
df = data.frame(x = rnorm(10000), y = rnorm(10000), z = rnorm(10000))
# min z in 2d bin
g1 <- ggplot(df, aes(x, y, z = z)) +
stat_summary_2d(fun = min, na.rm = TRUE, bins = 20) +
labs(subtitle = "plot min of z in each 2-D bin")
g2 <- ggplot(df, aes(x, y, z = z)) +
stat_summary_hex(fun = max, na.rm = TRUE, bins = 20) +
labs(subtitle = "plot max of z in each 2D hex")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("plot 2D count and summury statistics"))
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
geom_point(aes(size = poptotal / 1e6)) +
stat_smooth(method = lm, size = 1, color = "red", fill = "lightgreen") +
stat_smooth(aes(weight = poptotal), method = lm, size = 1) +
scale_size_area(guide = "none") +
labs(title = "Apply weight in geom_smooth",
subtitle = "wieght is an aes")
g1 <- ggplot(mpg, aes(drv, fill = factor(year))) +
# the default position
stat_count(position = "stack") +
labs(subtitle = 'position = "stack", on top of each other')
g2 <- ggplot(mpg, aes(drv, fill = factor(year))) +
# side by side
stat_count(position = "dodge") +
labs(subtitle = 'position = "dodge", side by side')
g3 <- ggplot(mpg, aes(drv, fill = factor(year))) +
# identity plot all bars from zero that overlap with each other
stat_count(position = "identity", alpha = 0.5) +
labs(subtitle = 'position = "identity", all start from zero')
g4 <- ggplot(mpg, aes(drv, fill = factor(year))) +
# add up to 1 (100%)
stat_count(position = "fill") +
labs(subtitle = 'position = "fill", add up to 100%')
g5 <- ggplot(mpg, aes(drv, fill = factor(year))) +
# jitter is not useful for bar plot
stat_count(position = "jitter", alpha = 0.5) +
labs(subtitle = 'position = "jitter", not useful for bar plot')
grid.arrange(g1, g2, g3, g4, g5, nrow = 2,
top = textGrob("Five position choices",
gp = gpar(fontsize = 16)))
position_jitterdodge() dodges points within groups and then add a small noise. Often used to provide data points for geom_box
ggplot(mpg, aes(drv, hwy, color = factor(year))) +
geom_boxplot() +
geom_point(position = position_jitterdodge()) +
labs(title = "position_jitterdodge() dodges jittered points within groups")
May be useful to compare plot side by side in different layers.
ggplot(mpg, aes(drv, hwy)) +
geom_point(color = "grey") +
geom_point(position = position_nudge(x = 0.1, y = 1)) +
labs(title = "position_nudge() shifts the whole plot")
When x and y are provided. For position = “dodge”, the aes(fill) must be categorical.
library(ggplot2)
df = data.frame(x = c("A", "A", "B", "B", "B", "C"), y = 1:6, z = 1:6)
g1 <- ggplot(df, aes(x, y, fill = z)) +
geom_col(position = "dodge") +
labs(subtitle = "contineous fill, dodge not working")
g2 <- ggplot(df, aes(x, y, fill = z)) +
geom_col(position = "stack") +
labs(subtitle = "contineous fill, stack is ok")
g3 <- ggplot(df, aes(x, y, fill = z)) +
geom_col(position = "fill") +
labs(subtitle = "contineous fill, fill is ok")
g4 <- ggplot(df, aes(x, y, fill = factor(z))) +
geom_col(position = "dodge") +
labs(subtitle = "categorical fill, dodge is ok")
g5 <- ggplot(df, aes(x, y, fill = factor(z))) +
geom_col(position = "stack") +
labs(subtitle = "categorical fill, stack is ok")
g6 <- ggplot(df, aes(x, y, fill = factor(z))) +
geom_col(position = "fill") +
labs(subtitle = "categorical fill, fill is ok")
grid.arrange(g1, g2, g3, g4, g5, g6, nrow = 2,
top = textGrob("Dodge does not work with contineous or no fill"))
library(ggplot2)
df = data.frame(x = c("A", "A", "B", "B", "B", "C"), y = 1:6, z = letters[1:6])
g1 <- ggplot(df, aes(x, y)) +
geom_col(aes(fill = z), position = "dodge") +
geom_text(aes(label = z, color = z),
position = "dodge", vjust = -0.2) +
labs(subtitle = 'position = "dodge" does not align text correctly')
# need to fine tune the position_dodge(width = xxx)
g2 <- ggplot(df, aes(x, y)) +
geom_col(aes(fill = z), position = "dodge") +
geom_text(aes(label = z, color = z),
position = position_dodge(width = 0.9),
vjust = -0.2) +
labs(subtitle = 'position = position_dodge(width = 0.9) works')
grid.arrange(g1, g2, nrow = 1,
top = textGrob("Use position_dodge() to dodge text"))
ggplot(seals, aes(lat, long)) +
geom_point(size = 0.5, color = "red") +
geom_segment(aes(x = lat, y = long, xend = lat + delta_lat, yend = long + delta_long),
color = "blue",
arrow = arrow(angle = 20, length = unit(1, "mm"))) +
labs(title = "Draw a vector field with geom_segment()",
subtitle = "arrows are drawn with arrow() funtion with specified angle and length")
ggplot(data = presidential) +
# ymin and ymax should be set to -Inf and Inf to cover full data range
geom_rect(aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf,
fill = party), alpha = 0.2) +
geom_text(aes(x = start, y = 0.0001, label = name),
angle = 90, vjust = 1, hjust = 0) +
# learn how to generate sequential dates
scale_x_date(expand = c(0, 0),
breaks = unique(c(presidential$start, presidential$end))) +
# breaks = seq(min(presidential$start), max(presidential$end), "8 years")) +
scale_y_continuous(expand = c(0, 0),
labels = scales::percent_format()) +
scale_fill_manual(values = c("Democratic" = "blue", "Republican" = "red")) +
labs(title = "Draw time blocks with geom_rect()",
subtitle = "scale ymin and ymax with -Inf and Inf to match other data",
y = "Unemployed population / total population") +
theme_bw() +
theme(panel.grid.major.x = element_line(color = "grey90", size = 0.2),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = -30, hjust = 0, vjust = 1)) +
# add layer of unemployment
geom_line(data = economics, aes(date, unemploy / pop))
In boxplot, box show 25% to 75% at two ends and 50% inside and the whiskers are by default 1.5 times of box height above and below the box. Outliers are those higher or lower than whiskers and plotted as points. For normal distribution, less than 1% data point are outliers.
set.seed(123)
df <- data.frame(x = letters[1:4], y = rnorm(4000))
ggplot(df, aes(x, y)) +
geom_jitter(color = "red", alpha = 0.3, width = 0.3, height = 0) +
geom_boxplot(fill = NA, outlier.alpha = 0.3, outlier.size = 3) +
labs(x = NULL,
y = NULL,
title = "Boxplot: explain the five summary statistics and outliers",
subtitle = "Less than 1% are outliers in normal distribution, as seen in the plots")
# The default use is to count a single categorical variable
ggplot(mpg, aes(manufacturer, fill = drv)) +
geom_bar(position = "stack") + # this is default position
labs(title = "bar plot of the count of a categorical variable, aes(catergorial_variable)",
subtitle = "plotted from raw data before summary statistics")
# however it can be used to plot bar of summarized data
df <- data.frame(x = letters[1:4], y = c(2, 1, 5, 3))
ggplot(df, aes(x, y)) +
geom_bar(stat = "identity") +
labs(title = "bar plot from two variables, aes(x, y)",
subtitle = "after summary statistics. no statistics in the plot so set stat = 'identity'")
df <- data.frame(
x = rep(1:4, 3),
y = c(4, 3, 1, 6, 1, 2, 1, 3, 1, 1, 1, 1),
group = rep(c("A", "C", "B"), each = 4)
)
# geom_area() plot first level at top by default. To start from bottom, reverse
# levels
df$group <- factor(df$group, levels = rev(levels(df$group)))
# area plot with borders
ggplot(df, aes(x, y, fill = group)) +
# color and size are for boundaries
geom_area(color = "grey50", size = 0.5) +
labs(title = "Plot ribbons with geom_area()",
subtitle = "by default, the first level is top which need to be reversed for better view")
“inward” automatically give hjust an vjust a position number 0 or 1 according (x, y) position. For example, if the (x, y) is at the top right relative to center, then hjust = 1, vjust = 1
df <- data.frame(x = c(1, 1.9, 3), y = c(1, 1.9, 3))
ggplot(df, aes(x, y)) +
geom_point(color = "red") +
# use geom_text to place label from data.frame
geom_text(data = data.frame(x = c(1, 1.9, 3), y = c(1, 1.9, 3)), aes(x,y),
label = c("at corner inward make\nposition inward",
"near middle the text \nstay towards center\nunless exact center",
"no matter what corner\ninward is inward"),
hjust = "inward", vjust = "inward") +
# use annotate("text", ...) to add any text
annotate("text", x = 1, y = 3, label = "inward works for labels created with\nannotate",
hjust = "inward", vjust = "inward", color = "red") +
labs(title = 'Add text with geom_text or annotate("text", ...)',
subtitle = '"inward" works best near corners')
NO true 3D plot in ggplot.
# need x, y, z data for surfance plot
g1 <- ggplot(faithfuld, aes(eruptions, waiting)) +
geom_contour(aes(z = density, colour = ..level..)) +
labs(subtitle = "contour plot")
g2 <- ggplot(faithfuld, aes(eruptions, waiting)) +
geom_raster(aes(fill = density)) +
labs(subtitle = "raster plot")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("ggplot supports surface plots but not true 3D plot",
gp = gpar(fontsize = 16)))
df <- data.frame(x = 1:3, y = c(18, 11, 16), se = c(1.2, 1.5, 1.0))
ggplot(df, aes(x, y, ymin = y - se, ymax = y + se)) +
# for contineous x
geom_smooth(stat = "identity") +
# add standard error plot
geom_errorbar(width = 0.2, size = 6) +
# if also want add the point range or line range
geom_pointrange(size = 3, color = "red") +
geom_linerange(color = "blue", size = 1, linetype = "solid") +
# what about add a box
geom_crossbar(color = "green", width = 0.3) +
labs(title = "Plot errors when knowing means and errors",
subtitle = "aes takes ymin and ymax. There are many way to make error plots.")
g1 <- ggplot(diamonds, aes(depth)) +
geom_histogram(aes(fill = cut)) +
xlim(58, 68) +
labs(subtitle = "traditional histogram")
g2 <- ggplot(diamonds, aes(depth)) +
geom_histogram(aes(fill = cut), binwidth = 0.1, position = "fill", na.rm = TRUE) +
xlim(58, 68) +
labs(subtitle = "stacked histogram")
g3 <- ggplot(diamonds, aes(depth)) + # fill does not work for geom_freqpoly()
geom_freqpoly(aes(color = cut), binwidth = 0.1, na.rm = TRUE) +
xlim(58, 68) +
labs(subtitle = "line plot of histogram")
g4 <- ggplot(diamonds, aes(depth)) +
geom_density(aes(fill = cut, color = cut), alpha = 0.2, na.rm = TRUE) +
xlim(58, 68) +
labs(subtitle = "density distribution")
grid.arrange(g1, g2, g3, g4, nrow = 2,
top = textGrob("Various way to plot distribution",
gp = gpar(fontsize = 16)))
df <- data.frame(x = rnorm(2000), y = rnorm(2000))
norm <- ggplot(df, aes(x, y)) + xlab(NULL) + ylab(NULL)
g1 <- norm + geom_bin2d(bins = 30) +
labs(subtitle = "geom_bin2d() draw rectangles")
g2 <- norm + geom_hex(bins=30, color = "grey30", size = 0.1) +
labs(subtitle = "geom_hex() draw hex, considered better generally")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("turn dense scattered plot into surface density plot"))
df <- data.frame(x = c(0.1, 1, 1000), y = 1:3, z = c(0.1, 2, 2000))
ggplot(df, aes(x, y, color = z)) +
geom_point() +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "reciprocal") +
scale_color_continuous(trans = "sqrt") +
labs(title = "Transform scale not data",
subtitle = "there are more to offer than just log10")
# A default aes is also determined by non-aes properties such as size, shape,
# and alpha outside of aes(). They can be changed with override.aes()
df <- data.frame(x = c(20, 30, 40), y = c(30, 20, 15), z = c("aa", "bb", "cc"))
p <- ggplot(df) +
geom_point(aes(x, y, color = z), size = 10, alpha = 0.3)
# the default legend is too big and too dim, override them
g1 <- p + labs(subtitle = "default aes: too big and too dim")
# override the legend, can even change the shape
g2 <- p + scale_color_discrete(guide = guide_legend(override.aes = list(size = 3,
alpha = 1,
shape = 15))) +
labs(subtitle = "override aes: all applicable aes can be overridden")
grid.arrange(g1, g2, nrow = 1,
top = textGrob("Override aes in legend if default is not desirable",
gp = gpar(fontsize = 16)))
# There are other mannual ways to get the same result, for example, mannually
# tune each labels
ggplot(mpg, aes(hwy, cty, color = drv)) +
geom_point() +
# use scales package to get special format
scale_x_continuous(limits = c(10, 50),
breaks = c(10, 30, 50),
labels = scales::percent_format()) +
scale_y_continuous(labels = scales::dollar_format("$")) + # $ replacible with any characters
# or mannually by tuning labels with paste0()
scale_color_discrete(breaks = c("f", "4", "r"),
labels = paste0("&_", c("f", "4", "r"), "_&")) +
labs(title = "Add special format in scales such as $ and %",
subtitle = 'packages "scaels" get many of these work done but can also be achieved manually\n for example in the legend below')
# use %o% to generate breaks for log scale
df <- data.frame(x = c(1, 50, 300, 5000), y = 1:4)
ggplot(df, aes(x, y)) +
geom_point() +
scale_x_continuous(trans = "log10",
minor_breaks = (1:10) %o% 10^(0:4)) +
labs(title = "Draw minor ticks for log10 scale",
subtitle = "great application of %o% operator")
# only show in the range of real data
ggplot(map_data("state"), aes(long, lat, group = group)) +
geom_path() +
# remove empty space at x-axis, compare to y-axis
scale_x_continuous(expand = c(0, 0)) +
# add space c(multiply data range by 0.1, add with 1)
scale_y_continuous(expand = c(0.1, 1)) +
coord_map() +
theme(panel.background = element_rect(fill = NA, color = "red"),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()) +
labs(title = "Remove empty space arond a plot",
subtitle = "by default, extra space is added to the limit of data. it can be removed")
# use of limits(), which takes a vector of categorical values
fwd <- subset(mpg, drv == "f")
rwd <- subset(mpg, drv == "r")
class <- unique(mpg$class)
g1 <- ggplot(fwd, aes(displ, hwy, color = class)) +
geom_point() +
scale_x_continuous(limits = c(1, 7)) +
scale_color_discrete(limits = class) + # limits is a vector of categorical + values
labs(title = "First plot",
subtitle = 'show only for drv == "f"')
g2 <- ggplot(rwd, aes(displ, hwy, color = class)) +
geom_point() +
scale_x_continuous(limits = c(1, 7)) +
scale_color_discrete(limits = class) +
labs(title = "Second plot",
subtitle = 'show only for drv == "r"')
# display two plot side by side
grid.arrange(g1, g2, nrow = 1,
top = textGrob("Forced match of axis and legend for two ggplots that would be different by default\n",
gp = gpar(fontsize = 16)))
# By default, missing values are colored in gray, but it can be changed.
df <- data.frame(x = 1, y = 1:5, z = c(1, 3, 2, NA, 5))
p <- ggplot(df, aes(x, y)) + geom_tile(aes(fill = z), size =5)
g1 <- p + labs(subtitle = "NA is grey by default")
g2 <- p + scale_fill_gradient(na.value = NA) + # no color, i.e. transparent for NA
labs(subtitle = "set NA to be transparent")
g3 <- p + scale_fill_gradient(na.value = "red") + # red for NA
labs(subtitle = "set NA to be red")
grid.arrange(g1, g2, g3, nrow = 1,
top = textGrob("Display colors for missing values",
gp = gpar(fontsize = 16)))
# these palette are pre-determined by the ColorBrewer, users are not allowed
# define their own palette.
n <- 13
df <- data.frame(x = 1:n, y = 1:n, z = letters[1:n])
g <- ggplot(df, aes(x, y)) +
geom_point(size = 6, color = "grey90") +
geom_point(size = 6, aes(color = z))
# types make no difference. Only allow a maximun of 9 colors.
g1 <- g + scale_color_brewer(palette = "Set1") + # most distinguishable colors
labs(subtitle = "palette Set1 only allows 9 colors")
# g + scale_color_brewer(type = "div" , palette = "Set1")
# g + scale_color_brewer(type = "qual", palette = "Set1")
# gray scale of one color. Only allow a maximun of 9 colors.
g2 <- g + scale_color_brewer(palette = "Greens") +
labs(subtitle = "shades of one color, 9 shades max")
# grays scale of two colors with two colors at the limits. Only allow 11 colors.
g3 <- g + scale_color_brewer(palette = "PuOr") +
labs(subtitle = "palatte PuOr allows 11 colors")
grid.arrange(g1, g2, g3, nrow = 1,
top = textGrob("Color brewer allows limited number of colors in a palatte",
gp = gpar(fontsize = 16)))
# values is a vector of colors. If a named vector, names are the factors of the
# aes mapping, in this example, factors in z.
# repeat colors in Set1 in ColorBrewer
set1 <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33",
"#A65628", "#F781BF", "#999999")
color_palette <- rep(set1, 10)
# recycle colors
g + scale_color_manual(values = color_palette) +
labs(title = "Use hand-picked colors",
subtitle = "can be any colors of choice")
# limit can be set for both contineous and categorical scales
ggplot(mpg, aes(drv, hwy, color = class)) +
geom_jitter(width = 0.2) +
# set limit for categorical axis, or use help function xlim("f", "r")
scale_x_discrete(limits = c("f", "r")) +
# set limit for contineous axis, help function ylim(NA, 30)
scale_y_continuous(limits = c(NA, 30)) +
# set limit for color, no help function for color. set color with na.value for
# data out of limit. it can be nice way to highlight data
scale_color_discrete(limits = c("2seater", "midsize"), na.value = "grey80") +
labs(title = "set limit can selectively display data",
subtitle = "especially when set limits for discrete colors as not-selected are in grey")
base <- ggplot(economics, aes(date, psavert)) +
geom_line(na.rm = TRUE) +
labs(x = NULL, y = NULL)
g1 <- base + labs(subtitle = "default date") # default
g2 <- base + scale_x_date(date_labels = "%y", date_breaks = "5 years") + # 95, 00, 05 ...
labs(subtitle = "every five years")
g3 <- base + scale_x_date(limits = as.Date(c("2004-01-01", "2005-01-01")), # each month
date_labels = "%b %y",
date_minor_breaks = "1 month") +
labs(subtitle = "every month")
g4 <- base + scale_x_date(limits = as.Date(c("2004-01-01", "2004-05-01")),
date_labels = "%m/%d/%Y",
date_breaks = "4 weeks",
date_minor_breaks = "1 weeks") +
labs(subtitle = "every four weeks")
grid.arrange(g1, g2, g3, g4, nrow = 2,
top = textGrob("Date scale needs spacial treatment",
gp = gpar(fontsize = 16)))
# facet_wrap()
ggplot(mpg, aes(hwy, cty)) +
geom_point() +
# arrange panels column-by-column with dir = "v", row-by-row with dir = "h"
# control scale of x and y of each panel with scale = "free", or free_x, free_y...
# strip.position to control where to display strip
facet_wrap(~ class, nrow = 3, dir = "v", scale = "free_y",
strip.position = "left") +
labs(title = "Control scale in facet_wrap and arrange panels")
ggplot(mpg, aes(cty, model)) +
geom_point() +
# when scale is free, space = "free" sets the actual size of panel according
# to real scale
facet_grid(manufacturer ~ ., scale = "free_y", space = "free") +
theme(strip.text.y = element_text(angle = 0)) +
labs(title = "facet_grid has one more control of display, the space")
df <- data.frame(x = rnorm(300), y = rnorm(300), z = letters[1:3])
ggplot(df, aes(x, y)) +
geom_point(alpha = 0.5) +
facet_wrap(~z) +
# this line appears in all panels, as it has no z
geom_line(data = data.frame(x = -1:1, y = -1:1), color = "red", size = 3) +
# with z, only appear in its own panel b
geom_line(data = data.frame(x = 1:-1, y = -1:1, z = c("b", "b", "b")),
size = 3, color = "blue") +
labs(title = "Facet: add new layers to selected panels",
subtitle = "use the variable for facet")
ggplot(mpg, aes(hwy, cty, color = drv)) +
geom_point() +
# in quote use "~" for black space
# google "mathematical annotation in R" for more math expression
labs(title = quote( Math~works~at~any~title~and~labels~math~such~as~x^2 + x),
subtitle = 'pay attention to space and google "mathematical annotation in R" for more math expression',
x = "high way",
y = quote(sqrt(x)+log(x)),
color = quote(frac(x,y))) +
# set parse = TRUE to parse math expression
annotate("text", x = 20, y = 30, label = "annotate~label~x%+-%y", parse = TRUE)
ggplot(mpg, aes(factor(cyl), hwy, color = cty, shape = drv)) +
geom_jitter(width = 0.2, aes(size = displ)) +
scale_color_gradient2(low = "red", high = "blue", midpoint = 22) +
guides(
# move shape legend ahead of color
shape = guide_legend(order = 1,
direction = "horizontal",
override.aes = list(size = 10,
color = "grey50"),
title.position = "top"),
# modify contineous color bar
color = guide_colorbar(reverse = TRUE,
barwidth = unit(3, "cm"),
direction = "horizontal",
title.position = "top"),
# size legend second
size = guide_legend(order = 2,
direction = "horizontal",
override.aes = list(shape = 1),
nrow = 2,
byrow = TRUE,
title.position = "top")
) +
labs(color = "city mileage", shape = "drive train", size = "displacement") +
theme(panel.background = element_rect(fill = "grey10"),
legend.key = element_rect(fill = NA)) +
labs(title = "Control appearance of lengends",
subtitle = "such as order of multiple legends, direction, arrangement, and title position of each legend")
# fake an aes if there is no real data for the aes
df <- data.frame(x = c(20, 30, 40), y = c(30, 20, 15), z = c("aa", "bb", "cc"))
ggplot(mpg, aes(hwy, cty)) +
geom_point(aes(color = drv), show.legend = FALSE) + # hide legend
geom_line(data = df, aes(x, y, linetype = "setline")) + # fake aes of linetype
geom_point(data = df, aes(x, y, shape = z)) + # do not use color otherwise mixed with other color aes
scale_linetype_manual(values = c(setline = "solid"), labels = c("added layer")) +
labs(title = "Show and hide legends of multilayers",
subtitle = "hide color legend that would appear by default\nforce line legend that does not appear by default using forced aes",
linetype = "added line",
shape = "additional points")
ggplot(mpg, aes(hwy, cty)) +
geom_blank() +
annotate("text", x = Inf, y = Inf, label = "test Inf at\nthis place",
hjust = 1.1, vjust = 1.1) +
annotate("text", x = c(20, 30, 40), y = c(10, 20, 15),
label = "duplicated text at\nmultiple locations") +
annotate("text", x = 15, y = 40, angle = 30,
label = "paste(italic(R) ^ 2, \" = 0.75\")", parse = TRUE) +
annotate("segment", x = c(20, 20, 20, 20), y = c(45, 40, 35, 30),
xend = c(30, 30, 30, 30), yend = c(40, 35, 30, 25),
arrow = arrow(angle = 20, length = unit(5, "mm"),
type = "closed"),
color = "red", size = 1:4 / 2) +
annotate("curve", x = 15, y = 30, xend = 18, yend = 40, curvature = 0.9,
arrow = arrow(angle = 20, length = unit(3, "mm"), type = "closed")) +
annotate("point", x = c(15, 18), y = c(30, 40), color = "red") +
annotate("rect", xmin = 15, ymin = 15, xmax = 25, ymax = 25,
fill = NA, color = "blue", linetype = "dotted") +
# pointrange at (x, y) and with y range of ymin and ymax
annotate("pointrange", x = 12, y = 25, ymin = 10, ymax = 30) +
annotate("point", size = 5,
x = 30:35, y = c(10, 11, 12, 11, 11, 10),
color = c("orange", "red", "blue", "green", "cyan", "black")) +
annotate("line", x = 30:35, y = c(10, 11, 12, 11, 11, 10)) +
annotate("step", x = c(35, 38, 40, 43, 45),
y = c(30, 35, 33, 40, 41), color = "green") +
annotate("point", x = c(35, 38, 40, 43, 45),
y = c(30, 35, 33, 40, 41), color = "green") +
annotate("path", x = c(35, 36, 37, 36, 35), y = c(20, 20.5, 22, 24, 27),
arrow = arrow(angle = 30, length = unit(3, "mm"))) +
# does not work with geoms of hline and vline
annotate("hline", yintercept = 28) + # plot nothing but not error either
labs(title = "Create simple geoms and annotations from vector data",
subtitle = "complicated ones should be done with geom_xxx from data in dataframes")
g1 <- ggplot(mpg, aes(hwy, cty)) + geom_blank() +
coord_fixed(ratio = 0.5)
g2 <- ggplot(mpg, aes(hwy, cty)) + geom_blank() +
coord_fixed(ratio = 1)
g3 <- ggplot(mpg, aes(hwy, cty)) + geom_blank() +
coord_fixed(ratio = 1.5)
grid.arrange(g1, g2, g3, nrow = 1,
top = textGrob("Use coord_fixed to control aspect ratio"))
g1 <- ggplot(mpg, aes(hwy, cty)) +
geom_point() +
stat_smooth(method = "lm") +
labs(subtitle = "full range")
g2 <- g1 + coord_cartesian(xlim = c(20, 30), ylim = c(15, 25)) +
labs(subtitle = "coord_catersian(xlim=, ylim=) keep data outside limits")
g3 <- g1 + xlim(20, 30) + ylim(15, 25) +
labs(subtitle = "lose data outside limits with xlim()")
grid.arrange(g1, g2, g3, ncol = 1,
top = textGrob("Keep or lose data outside of limits when zoon in"))
base <- ggplot(mtcars, aes(factor(1), fill = factor(cyl))) +
stat_count(width = 1)
# bar chart
g1 <- base
# pie chart
g2 <- base + coord_polar(theta = "y")
# bylleye chart
g3 <- base + coord_polar()
grid.arrange(g1, g2, g3, nrow = 1,
top = textGrob("Pie chart and bulleye chart"))
theme_update() is a funky function. It does two things: first, it changes the theme as specified by its parameters; second, it returns the old theme before update.
# this is the default theme
theme_set(theme_grey())
# update theme and return the old theme
old_theme <- theme_update(
plot.background = element_rect(fill = "lightblue3", color = NA),
panel.background = element_rect(fill = "lightblue", color = NA),
axis.text = element_text(color = "linen"),
axis.title = element_text(color = "linen")
)
df <- data.frame(x = 1:3, y = 1:3)
base <- ggplot(df, aes(x, y)) + geom_point()
# plot using the updated theme
base + labs(title = "updated theme")
# reset back to old theme
theme_set(old_theme)
base + labs(title = "old theme")
df <- data.frame(x = 1:10000, y = rnorm(10000))
g1 <- ggplot(df, aes(x, y)) +
geom_boxplot(aes(group = cut_width(x, 2000, boundary = 0))) +
labs(subtitle = "cut width of 2000 along x starting from left limit")
g2 <- ggplot(df, aes(x, y)) +
geom_boxplot(aes(color = cut_number(y, 5))) +
labs(subtitle = "cut y into 5 groups with about equal number")
g3 <- ggplot(df, aes(x, y)) +
geom_boxplot(aes(fill = cut_interval(y, 5))) +
labs(subtitle = "cut y into 5 intervals of equal width")
grid.arrange(g1, g2, g3, nrow = 1,
top = textGrob("Use ggplot2's cut_xxx() to categorize contineous variable",
gp = gpar(fontsize = 16)))